import re
import shutil
import time

import requests
import os
from util.header import get_headers_init_user_agent
from util.自定义logging import get_logger

root = 'static/喜马拉雅'
if os.path.exists(root):
    shutil.rmtree(root)
os.mkdir(root)

logger = get_logger('喜马拉雅.log')

def clean_filename(filename):
    illegal_chars = r'[<>:"/\\|?*&]'
    return re.sub(illegal_chars, '_', filename)

url = 'https://www.ximalaya.com/revision/category/v2/albums?pageNum=1&pageSize=56&sort=1&categoryId=3'
headers = get_headers_init_user_agent()

logger.info('开始爬取')
res = requests.get(url,headers=headers)
# print(res.json())
logger.info('爬取网页结束')


items = res.json()['data']['albums']
for item in items:
    title = item['albumTitle']
    title = clean_filename(title)
    img_url = f"https://imagev2.xmcdn.com/{item['albumCoverPath']}"
    logger.info(f'开始爬取图片{title}')
    img_res = requests.get(img_url,headers=headers)
    logger.info(f'爬取图片{title}结束')
    img_data = img_res.content
    print(img_url)
    logger.info(f'开始保存图片{title}')
    with open(f'{root}/{title}.jpg', 'wb') as f:
        f.write(img_data)
    logger.info(f'已保存图片{title}')
    time.sleep(1)

logger.info(f'爬虫程序结束')



